# load libraries
library(tidyverse)
library(ggtree)
library(pheatmap)
library(igraph)
library(regentrans)
library(tidygraph)
library(ggraph)
library(exact2x2)
library(cowplot)
library(readxl)
library(ape)
library(phytools)
library(ggtree)
library(ggtext)
library(lubridate)

# set plotting theme
theme_set(
  theme_bw() + 
    theme(text = element_text(size = 10), 
          axis.text.x = element_text(angle = 0),
  strip.background = element_rect(fill="white",linetype='blank'))) 

# load transition edge script
source('code/get_transition_edges.R')

# set seed
set.seed(0)
# load in data
# facility and ST
facil_st <- read_csv('data/isolate_data.csv')
facil_st_mn <- facil_st %>% filter(State == 'MN')
write_csv(facil_st_mn, 'data/mn_isolate_data.csv')
# public isolate data
metadat_complete <- read_csv('data/public_isolate_metadata.csv')
# public isolate trees
tree_list <- read_rds('data/tree_list.rds')
# pairwise snv distances
kp_dists <- read_rds('data/kp_dists.rds')
ec_dists <- read_rds('data/eh_dists.rds')
# phylogenetic trees
tr_kp <- read.tree('data/kp_tr.tree')
tr_ec <- read.tree('data/eh_tr.tree')
# clinical factors
pt_feats_ct <- read_csv('data/pt_feats_ct.csv')
pt_feats_mn <- read_csv('data/pt_feats_mn.csv')
pt_feats_tn <- read_csv('data/pt_feats_tn.csv')
# shared facility exposures
pr_hosp_pairs_ct <- read_csv('data/pr_hosp_pairs_ct.csv')
pr_hosp_pairs_mn <- read_csv('data/pr_hosp_pairs_mn.csv')
pr_hosp_pairs_tn <- read_csv('data/pr_hosp_pairs_tn.csv')
# patient transfer networks
ct_trans_net_sub <- read_csv('data/trans_net_ct.csv')
mn_trans_net_sub <- read_csv('data/trans_net_mn.csv')
tn_trans_net_sub <- read_csv('data/trans_net_tn.csv')
# facility types
facil_types <- read_csv('data/facil_types.csv')
# facility counts
facil_type_counts <- read_csv('data/facil_type_counts.csv')
# facil_type_counts_wgs <- read_csv('data/facil_type_counts_wgs.csv') 
# facility w prior hospitals
facil_w <- read_csv('data/facil_w_st171_prior_hosps.csv')
# prior tn hospitals
prior_hosps_tn <- read_csv('data/pr_hosps_tn.csv')
# tn facil clutser
tn_facil_cluster <- read_csv('data/tn_facil_cluster.csv')
# dominant STs and colors
st_cts <- rev(sort(table(facil_st$ST)))
dominant_sts <- names(st_cts)[st_cts > 10 & names(st_cts) != 'Other species']
st_cols <- rep(NA, length(st_cts))
names(st_cols) <- names(st_cts)
st_cols['K. pneumoniae ST258'] <- 'coral4'
st_cols['K. pneumoniae ST307'] <- 'coral3'
st_cols['E. hormaechei ST171'] <- 'aquamarine4'
st_cols['E. hormaechei ST114'] <- 'aquamarine3'
st_cols[is.na(st_cols)] <- 'grey'
st_cols['Other'] <- 'grey'

Data summary

Number of facilities with CRE per state

facil_st %>% 
  group_by(State) %>% 
  summarize(n_facil = n_distinct(Facility_ID))
## # A tibble: 3 × 2
##   State n_facil
##   <chr>   <int>
## 1 CT         22
## 2 MN         60
## 3 TN         40

Facility type information (Table S1)

# number of each facility type per state
facil_type_counts
## # A tibble: 4 × 4
##   type     CT    MN    TN
##   <chr> <dbl> <dbl> <dbl>
## 1 ACH      30    50    92
## 2 LTACH     3     2    10
## 3 Other     7    96    48
## 4 SNF     224   366   309
# facility types with at least one WGS
facil_types %>% 
  group_by(state, type) %>% 
  summarize(count = n()) %>% 
  pivot_wider(names_from = state, values_from = count, values_fill = 0) 
## # A tibble: 5 × 4
##   type       CT    MN    TN
##   <chr>   <int> <int> <int>
## 1 ACH        19    23    31
## 2 Unknown     3    20     4
## 3 LTACH       0     2     4
## 4 Other       0     6     1
## 5 SNF         0     9     0

Species counts by state (Table 1)

state_dates <- facil_st %>% mutate(Cult_Date = gsub('-.*|.*/','', Cult_Date)) %>% 
  filter(!is.na(Cult_Date)) %>% 
  group_by(State, Cult_Date) %>% summarize() %>% 
  group_by(State) %>% summarize(min_cult_date = min(Cult_Date),
                                max_cult_date = max(Cult_Date))

facil_st %>% 
  mutate(State = factor(State, levels = c('MN','TN','CT'))) %>% 
  group_by(State, Species) %>% 
  count() %>% full_join(state_dates, .) %>% 
  suppressMessages()
## # A tibble: 6 × 5
##   State min_cult_date max_cult_date Species           n
##   <chr> <chr>         <chr>         <chr>         <int>
## 1 CT    2017          2018          E. hormaechei     8
## 2 CT    2017          2018          K. pneumoniae    44
## 3 MN    2012          2018          E. hormaechei    95
## 4 MN    2012          2018          K. pneumoniae   103
## 5 TN    2016          2017          E. hormaechei    58
## 6 TN    2016          2017          K. pneumoniae    78

Species counts by facility (Figure S1)

fs1 <- facil_st %>% filter(Species != 'Other') %>% 
    mutate(ST = factor(ST, levels = rev(names(st_cols))),
           Species = factor(Species, levels = c('K. pneumoniae','E. hormaechei','Other'))) %>% 
  ggplot(aes(x = Facility_ID, fill = ST)) + geom_bar(color = 'white') + facet_grid(Species~State, scales = 'free', space = 'free') +
  scale_fill_manual(values = st_cols, 
                    breaks = c('E. hormaechei ST171', 'E. hormaechei ST114',
                               'K. pneumoniae ST258', 'K. pneumoniae ST307', 
                               'Other'), 
                    labels = c('*E. hormaechei* ST171', '*E. hormaechei* ST114',
                               '*K. pneumoniae* ST258', '*K. pneumoniae* ST307', 
                               'Other'),
                    drop = FALSE) +
  labs(y = 'Number of isolates', fill = '', x = 'Facility') +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        strip.text.y = element_text(face = 'italic'), legend.text = element_markdown())

ggsave(plot = fs1, filename = 'figures/FigS1.png', width = 7, height = 3)

fs1

Public data (Figure S2)

public_loc_plot <- metadat_complete %>% 
  mutate(state = ifelse(is.na(state), 'Unknown', state),
         kpc = case_when(is.na(kpc) ~ 'Unknown',
                         kpc == 0 ~ 'No',
                         kpc == 1 ~ 'Yes'),
         kpc = factor(kpc, levels = c('Yes', 'No', 'Unknown'))) %>% 
  group_by(state) %>% 
  mutate(tot = n()) %>%
  ggplot(aes(x = reorder(state, tot), fill = kpc)) +
  geom_bar() +
  labs(x = 'State', y = 'Number of public isolates', fill = 'KPC\npresent') +
  scale_y_log10() +
  scale_fill_manual(values = c('aquamarine3', 'aquamarine4', 'darkgrey')) +
  coord_flip()

public_dates_plot <- metadat_complete %>% 
  ggplot(aes(x = year)) +
  geom_bar() +
  labs(x = 'Year', y = 'Number of public isolates')

plot_grid(public_dates_plot, public_loc_plot, ncol = 1, rel_heights = c(0.5, 1), align = 'vh', labels = 'AUTO') %>% 
  ggsave(filename = 'figures/FigS2.png', width = 7, height = 10)

Importation vs. HGT

kpc_bin <- metadat_complete %>% select(isolate_no, kpc) %>% drop_na() 
mn_bin <- metadat_complete %>% select(isolate_no, mn_bin) %>% drop_na()
tn_bin <- metadat_complete %>% select(isolate_no, tn_bin) %>% drop_na()
ct_bin <- metadat_complete %>% select(isolate_no, ct_bin) %>% drop_na()

genomes <- unique(c(kpc_bin$isolate_no, mn_bin$isolate_no, tn_bin$isolate_no, ct_bin$isolate_no))

transitions <- lapply(1:length(tree_list), function(x){
  
  st <- names(tree_list)[[x]]
  
  t <- midpoint.root(tree_list[[x]])
  t <- keep.tip(t, t$tip.label[t$tip.label %in% genomes])
  
  kpc_trans_edges = mn_trans_edges = tn_trans_edges = ct_trans_edges = NULL
  
  kpc_bin_sub <- kpc_bin %>% filter(isolate_no %in% t$tip.label)
  if(length(unique(kpc_bin_sub$kpc)) != 1){
    kpc_trans_edges <- transition_edge_pipeline(kpc_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
  }
  mn_bin_sub <- mn_bin %>% filter(isolate_no %in% t$tip.label)
  if(length(unique(mn_bin_sub$mn_bin)) != 1){
    mn_trans_edges <- transition_edge_pipeline(mn_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
  }
  tn_bin_sub <- tn_bin %>% filter(isolate_no %in% t$tip.label)
  if(length(unique(tn_bin_sub$tn_bin)) != 1){
    tn_trans_edges <- transition_edge_pipeline(tn_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
  }
  ct_bin_sub <- ct_bin %>% filter(isolate_no %in% t$tip.label)
  if(length(unique(ct_bin_sub$ct_bin)) != 1){
    ct_trans_edges <- transition_edge_pipeline(ct_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
  }
  
  if(is.null(kpc_trans_edges) & is.null(mn_trans_edges) & is.null(tn_trans_edges) & is.null(ct_trans_edges)) return(NULL)
  
  trans_sum <- paste0(
    ifelse(mn_trans_edges$mn_bin$trans_dir != 1, '', 'to_mn '), 
    ifelse(tn_trans_edges$tn_bin$trans_dir != 1, '', 'to_tn '), 
    ifelse(ct_trans_edges$ct_bin$trans_dir != 1, '', 'to_ct '), 
    ifelse(kpc_trans_edges$kpc$trans_dir != 1, '', 'gain_kpc ')
  )
  
  edge <- data.frame(t$edge, 
                  edge_num=1:nrow(t$edge), 
                  edge_val = trans_sum[1:(length(trans_sum)-1)]) %>% 
    mutate(edge_val = ifelse(edge_val == '', NA, edge_val))
  colnames(edge)=c("parent", "node", "edge_num","edge_val")
  
  trans_edge_nums <- sapply(unique(metadat_complete$isolate_no[grepl('^EIP',metadat_complete$sources) &
                                                                  metadat_complete$isolate_no %in% t$tip.label]),
                            function(i){
    trans <- NA
    child <- which(t$tip.label == i)
    edge_num <- NA
    while(is.na(trans) & child != (Ntip(t) + 1)){ # if find transition going backwards or get to root
      child_edge_info <- edge %>% filter(node == child)
      trans <- child_edge_info %>% select(edge_val) %>% deframe()
      child <- child_edge_info %>% select(parent) %>% deframe()
      if(!is.na(trans)){
        edge_num <- child_edge_info %>% select(edge_num) %>% deframe()
      }
    }
    edge_num
  })
  
  trans_edge_nums <- trans_edge_nums[!is.na(trans_edge_nums)] #recently added
  
  grps <- data.frame(id=names(trans_edge_nums),
                     class=trans_sum[trans_edge_nums],
                     cluster=trans_edge_nums) %>% 
    mutate(state = factor(gsub('_.*','',id), levels = c('MN','TN','CT')),
           ST = sapply(id, function(x){
             val <- unlist(unique(metadat_complete$ST[metadat_complete$isolate_no == x & !is.na(metadat_complete$ST)]))
             val <- ifelse(length(val) == 0, paste0('ST', unlist(unique(metadat_complete$mlst[metadat_complete$isolate_no == x & !is.na(metadat_complete$mlst)]))), val)
             val <- ifelse(length(val) == 0 | val == 'ST', st, val)
             ifelse(length(val) == 0, NA, val)
           }),
           species = sapply(id, function(x){
             val <- unlist(unique(metadat_complete$species[metadat_complete$isolate_no == x & !is.na(metadat_complete$species)]))
             ifelse(length(val) == 0, NA, val)
           }),
           ST_orig = ST,
           ST = ifelse(ST %in% c('E. hormaechei ST171','E. hormaechei ST114',
                                 'K. pneumoniae ST258','K. pneumoniae ST307', 
                                 'E. cloacae ST171', 'E. cloacae ST114', 
                                 'ST258', 'ST307', 'ST114', 'ST171'),
                       ST, 'Other'),
           tree = st
           )
  if(nrow(grps) == 0) return(NULL)
  grps
}) %>% 
  bind_rows() %>% suppressWarnings()

transitions <- transitions %>% 
  mutate(class = gsub('NA| ', '', class), 
         class = ifelse(is.na(class), '', class),
         hgt_type = ifelse(grepl('gain_kpc', class), 'Unknown', ''),
         hgt_type = ifelse(class == 'gain_kpc', 'In-state', hgt_type),
         class2 = ifelse(toupper(gsub('to_','',class)) != state & !grepl('gain_kpc', class), '', class),
         class2 = ifelse(grepl('gain_kpc', class2), 'Putative\nHGT', class2),
         class2 = ifelse(class2 == '', 'Unknown', class2),
         class2 = ifelse(grepl('to_', class2), 'Putative\nimportation', class2),
         class2 = factor(class2, levels = c('Putative\nimportation','Putative\nHGT','Unknown')),
         ST = ifelse(ST %in% c('ST258','ST307'), paste0('K. pneumoniae ', ST), ST),
         ST = ifelse(ST %in% c('ST171','ST114'), paste0('E. hormaechei ', ST), ST),
         ST = gsub('cloacae', 'hormaechei', ST),
         ST_orig = gsub('cloacae', 'hormaechei', ST_orig)
         )

keepers <- facil_st %>% 
  filter(!(duplicated(Patient_ID) & duplicated(ST))) %>% 
  pull(Sample_ID)

Trees used (Figure S3)

states <- metadat_complete %>% select(isolate_no, state) %>% deframe()
states <- unique(states[c(tree_list[[1]]$tip.label, tree_list[[4]]$tip.label, tree_list[[7]]$tip.label, tree_list[[12]]$tip.label)])
states <- states[!is.na(states)]
state_cols <- c('#e6194b', '#3cb44b', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000', '#ffe119')
names(state_cols) <- states

plot_tr_locs <- function(tr, og, title = NULL){
  tr <- root(tr, og)
  tr <- drop.tip(tr, og)
  states <- metadat_complete %>% select(isolate_no, state) %>% deframe()
  states <- states[tr$tip.label]
  kpc <- metadat_complete %>% select(isolate_no, kpc) %>% deframe()
  kpc <- kpc[tr$tip.label]
  no_info <- is.na(states) & is.na(kpc)
  states <- states[!no_info]
  kpc <- kpc[!no_info]
  kpc <- ifelse(kpc == 1, 'Present', 'Absent')
  tr <- keep.tip(tr, union(names(states), names(kpc)))
  states <- c(states, rep(NA, Nnode(tr)))
  kpc <- c(kpc, rep(NA, Nnode(tr)))
  ggtree(tr) + 
    geom_tippoint(aes(col = states, shape = kpc)) + 
    scale_color_manual(values = state_cols) + 
    scale_shape_manual(values = c('Present' = 16, 'Absent' = 1)) +
    guides(color = guide_legend(ncol = 2)) +
    labs(title = title, shape = 'KPC', col = 'State')
}

tr_st114 <- plot_tr_locs(tree_list[[7]], 'MN_CRE226', '*E. hormaechei* ST114') + 
  theme(legend.position = 'none', plot.title = ggtext::element_markdown())
tr_st171 <- plot_tr_locs(tree_list[[4]], 'MN_CRE226', '*E. hormaechei* ST171') + 
  theme(legend.position = 'none', plot.title = ggtext::element_markdown())
tr_st258 <- plot_tr_locs(tree_list[[1]], '573.1451', '*K. pneumoniae* ST258') + 
  theme(legend.position = 'none', plot.title = ggtext::element_markdown())
tr_st307 <- plot_tr_locs(tree_list[[12]], '573.13662', '*K. pneumoniae* ST307') + 
  theme(legend.position = 'none', plot.title = ggtext::element_markdown())
leg <- get_legend(plot_tr_locs(tree_list[[7]], 'MN_CRE226', 'E. hormaechei ST114'))

fs3 <- plot_grid(plot_grid(tr_st171, tr_st114, tr_st258, tr_st307), leg)

ggsave(plot = fs3, filename = 'figures/FigS3.png', width = 8, height = 8)

fs3

Importation vs. HGT (Figure 1)

clust_cols <- st_cols[transitions$ST]
names(clust_cols) <- paste0(transitions$state, transitions$ST, transitions$cluster)

clust_cols <- (c(clust_cols[clust_cols == "grey"],
                clust_cols[clust_cols == "coral3"],
                clust_cols[clust_cols == "coral4"],
                clust_cols[clust_cols == "aquamarine3"],
                clust_cols[clust_cols == "aquamarine4"]
                ))

transitions <- transitions %>% filter(id %in% keepers) %>% 
  mutate(ST = factor(ST, levels = (c("E. hormaechei ST171", "E. hormaechei ST114", 
                                     "K. pneumoniae ST258", "K. pneumoniae ST307", 
                                     'Other'))),
         state = factor(state, levels = c('CT', 'MN', 'TN'))) %>%
  arrange(ST)

f1 <- transitions %>% 
  ggplot(aes(x = class2, 
             fill = factor(paste0(state, ST, cluster), 
                           levels = unique(paste0(state, ST, cluster))))) + 
  geom_bar(color = 'white', position = position_stack(reverse = TRUE)) + 
  scale_fill_manual(values = clust_cols,
                    breaks = rev(names(clust_cols)[!duplicated(clust_cols)]),
                    labels = c('*E. hormaechei* ST171', '*E. hormaechei* ST114',
                               '*K. pneumoniae* ST258', '*K. pneumoniae* ST307',
                               'Other'),
                    drop = FALSE) +
  facet_grid(~state, scales = 'free', space = 'free') +
  labs(fill = 'ST cluster', x = 'Cluster type', y = 'Number of isolates') +
  theme(legend.text = ggtext::element_markdown()) 

ggsave(plot = f1, filename = 'figures/Fig1.png', width = 7, height = 4)

f1

transitions %>% 
  group_by(ST, class2) %>% 
  summarize(count = n()) %>% 
  group_by(ST) %>% 
  mutate(tot = sum(count),
         prop = count/tot)
## # A tibble: 12 × 5
## # Groups:   ST [5]
##    ST                  class2                  count   tot   prop
##    <fct>               <fct>                   <int> <int>  <dbl>
##  1 E. hormaechei ST171 "Putative\nimportation"    67    71 0.944 
##  2 E. hormaechei ST171 "Unknown"                   4    71 0.0563
##  3 E. hormaechei ST114 "Putative\nimportation"     9    23 0.391 
##  4 E. hormaechei ST114 "Putative\nHGT"            14    23 0.609 
##  5 K. pneumoniae ST258 "Putative\nimportation"    63    66 0.955 
##  6 K. pneumoniae ST258 "Unknown"                   3    66 0.0455
##  7 K. pneumoniae ST307 "Putative\nimportation"     1    20 0.05  
##  8 K. pneumoniae ST307 "Putative\nHGT"            17    20 0.85  
##  9 K. pneumoniae ST307 "Unknown"                   2    20 0.1   
## 10 Other               "Putative\nimportation"    36   114 0.316 
## 11 Other               "Putative\nHGT"            27   114 0.237 
## 12 Other               "Unknown"                  51   114 0.447

HGT locations (Figure S4)

fs4 <- transitions %>% 
  filter(id %in% keepers & hgt_type != '') %>%
  ggplot(aes(x = hgt_type, 
             fill = factor(paste0(state, ST, cluster), 
                           levels = unique(paste0(state, ST, cluster))))) + 
  geom_bar(color = 'white', position = position_stack(reverse = TRUE)) + 
  scale_fill_manual(values = clust_cols,
                    breaks = rev(names(clust_cols)[!duplicated(clust_cols)]),
                    labels = c('*E. hormaechei* ST171', '*E. hormaechei* ST114',
                               '*K. pneumoniae* ST258', '*K. pneumoniae* ST307',
                               'Other'),
                    drop = FALSE) +
  facet_grid(~state, scales = 'free', space = 'free') +
  labs(fill = 'ST cluster', x = 'HGT location', y = 'Number of isolates') +
  theme(legend.text = ggtext::element_markdown()) 

ggsave(plot = fs4, filename = 'figures/FigS4.png', width = 6, height = 3)

fs4

# earliest date for ST114 and ST307 
facil_st %>% 
  filter(ST %in% c('E. hormaechei ST114', 'K. pneumoniae ST307') & State == 'TN') %>% 
  group_by(State, ST) %>% 
  mutate(Cult_Date = mdy(Cult_Date)) %>% 
  slice_min(Cult_Date) %>% 
  select(State, ST, Cult_Date)
## # A tibble: 2 × 3
## # Groups:   State, ST [2]
##   State ST                  Cult_Date 
##   <chr> <chr>               <date>    
## 1 TN    E. hormaechei ST114 2016-06-23
## 2 TN    K. pneumoniae ST307 2016-03-25

Distances

# snv distances
kp_snv_dists <- get_pair_types(kp_dists, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>% 
  filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>% 
  mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
         state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>% 
  filter(st1 == st2 & !grepl('novel',st1)) %>% 
  mutate(state_pair = paste(state1, state2)) 

ec_snv_dists <- get_pair_types(ec_dists, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>% 
  filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>% 
  mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
         state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>% 
  filter(st1 == st2 & !grepl('novel',st1)) %>% 
  mutate(state_pair = paste(state1, state2)) 

snv_dat <- bind_rows(kp_snv_dists, ec_snv_dists) %>% filter(state1 == state2) %>% 
  mutate(state1 = factor(state1, levels = c('MN','TN','CT')),
         pair_type = ifelse(is.na(pair_type), 'Unknown', pair_type),
         under_thresh = ifelse(pairwise_dist <= 15, '(10,15]','(15,∞)'),
         under_thresh = ifelse(pairwise_dist <= 10, '(5,10]',under_thresh),
         under_thresh = ifelse(pairwise_dist <= 5, '[0,5]',under_thresh),
         under_thresh = factor(under_thresh, levels = c('[0,5]','(5,10]','(10,15]','(15,∞)')),
         ST_orig = st1,
         ST = ifelse(st1 %in% dominant_sts, st1, 'Other'),
         Species = gsub(' ST.*','',st1),
         ST = gsub('.* ','',ST))
# patristic distances
kp_pat <- cophenetic.phylo(tr_kp)
ec_pat <- cophenetic.phylo(tr_ec)

kp_pat_dists <- get_pair_types(kp_pat, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>% 
  filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>% 
  mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
         state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>% 
  filter(st1 == st2 & !grepl('novel',st1)) %>% 
  mutate(state_pair = paste(state1, state2)) 

ec_pat_dists <- get_pair_types(ec_pat, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>% 
  filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>% 
  mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
         state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
         state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>% 
  filter(st1 == st2 & !grepl('novel',st1)) %>% 
  mutate(state_pair = paste(state1, state2)) 

pat_dat <- bind_rows(kp_pat_dists, ec_pat_dists) %>% filter(state1 == state2) %>% 
  mutate(state1 = factor(state1, levels = c('MN','TN','CT')),
         pair_type = ifelse(is.na(pair_type), 'Unknown', pair_type),
         ST_orig = st1,
         ST = ifelse(st1 %in% dominant_sts, st1, 'Other'),
         Species = gsub(' ST.*','',st1),
         ST = gsub('.* ','',ST))

Intra- and inter-facility pairwise SNV distances

Fraction of intra-facility isolates over time (Figure S5)

snvdat_orig <- snv_dat[,1:6] %>% mutate(pairwise_dist = as.numeric(pairwise_dist))

frac_intras <- lapply(unique(c(snv_dat$st1, snv_dat$st2)), function(st){
  lapply(c('MN MN','TN TN','CT CT'), function(s){
      snvdat_orig_sub <- snvdat_orig[snv_dat$st1 == st & snv_dat$st2 == st & snv_dat$state_pair == s,]
      frac_intra <- NULL
      if(nrow(snvdat_orig_sub) != 0){
        frac_intra <- get_frac_intra(snvdat_orig_sub) %>% mutate(ST = st, state = gsub(' .*','', s))
      }
      frac_intra
  }) %>% bind_rows()
}) %>% bind_rows()

fs5 <- frac_intras %>% 
    mutate(ST = ifelse(ST %in% c('E. hormaechei ST114','E. hormaechei ST171', 
                                 'K. pneumoniae ST258', 'K. pneumoniae ST307'), ST, 
                       'Other'),
           ST = factor(ST, levels = c('E. hormaechei ST171', 'E. hormaechei ST114', 
                                      'K. pneumoniae ST258', 'K. pneumoniae ST307', 
                                      'Other'),
                       labels = c('*E. hormaechei* ST171', '*E. hormaechei* ST114', 
                                      '*K. pneumoniae* ST258', '*K. pneumoniae* ST307', 
                                      'Other'))) %>% 
  filter(frac_intra != 0 & ST != 'Other') %>% 
  ggplot(aes(x = pairwise_dist, y = frac_intra)) + 
  geom_bar(stat = "identity", fill = "blue", alpha = 0.5) + 
  labs(x = "Pairwise SNV distance", y = "Fraction of intra-facility pairs") + xlim(0, 50) + 
  facet_wrap(state~ST, scales = 'free') +
  theme(strip.text = element_markdown())

ggsave(plot = fs5, filename = 'figures/FigS5.png', width = 6, height = 5)

fs5

Pairwise SNV distances by pair type, state, and dominant STs (Figure 2)

f2 <- snv_dat %>% 
  mutate(pairwise_dist = ifelse(pairwise_dist == 0, 1, pairwise_dist),
         pair_type = factor(gsub(' pair', '', pair_type), levels = c('Intra-facility','Inter-facility')),
         Species = gsub('^', '\\*', Species),
         Species = gsub('$', '\\*', Species),
         state1 = factor(state1, levels = c('CT', 'MN', 'TN'))) %>% 
  filter(pair_type != 'Unknown' & ST != 'Other') %>% 
  ggplot(aes(x = pair_type, y = pairwise_dist)) + 
  geom_jitter(aes(fill = under_thresh, color = under_thresh),alpha = 0.5) +
  scale_color_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
  geom_violin(position = 'identity',alpha=0, color = 'azure4') +
  facet_grid(Species+ST~state1, scales = 'free') + scale_y_log10() +
  labs(x = 'Pair type',y = 'Pairwise SNV distance', color = 'SNV bin', fill = 'SNV bin') +
  theme(strip.text.y = element_markdown())

ggsave(plot = f2, filename = 'figures/Fig2.png', width = 6.5, height = 5)

f2

Pairwise SNV distances by pair type, state, and all STs (Figure S6)

fs6 <- snv_dat %>% filter(state1 == state2 & pair_type != 'Unknown') %>% 
  mutate(state1 = factor(state1, levels = c('CT','MN','TN')),
         pair_type = ifelse(is.na(pair_type), 'Unknown', pair_type),
         st1 = gsub('^', '\\*', st1),
         st1 = gsub('e ', 'e\\* ', st1),
         st1 = gsub('i ', 'i\\* ', st1)) %>% 
  ggplot(aes(x = pairwise_dist, y = st1, color = pair_type)) + 
  geom_vline(xintercept = 10, color = 'darkgrey') +
  geom_jitter(alpha=0.25) + facet_grid(~state1, scales = 'free') +
  scale_color_manual(values = c('lightslateblue','black')) +
  labs(y = '', x = 'Pairwise SNV distance', color = '') +
  theme(axis.text.y = element_markdown())

ggsave(plot = fs6, filename = 'figures/FigS6.png', width = 7, height = 5)

fs6

Patristic distance sensitivity analysis (Figure S7)

fs7 <- left_join(snv_dat %>% rename(pd_snv = pairwise_dist), pat_dat %>% rename(pd_pat = pairwise_dist)) %>% 
  mutate(state1 = factor(state1, levels = c('CT', 'MN', 'TN')),
         pair_type = factor(gsub(' pair', '', pair_type), levels = c('Intra-facility','Inter-facility')),
         Species = gsub('^', '\\*', Species),
         Species = gsub('$', '\\*', Species)) %>% 
  filter(pair_type != 'Unknown' & ST != 'Other') %>% 
  ggplot(aes(x = pair_type, y = pd_pat)) + geom_jitter(aes(fill = under_thresh, color = under_thresh),alpha = 0.5) +
  scale_color_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
  geom_violin(position = 'identity',alpha=0, color = 'azure4') +
  facet_grid(Species+ST~state1, scales = 'free') + scale_y_log10() +
  labs(x = 'Pair type',y = 'Pairwise patristic distance', color = 'SNV bin', fill = 'SNV bin') +
  theme(strip.text.y = element_markdown())

ggsave(plot = fs7, filename = 'figures/FigS7.png', width = 6.5, height = 5)

fs7

Intra-facility analysis (Figure S8)

min_dat <- lapply(unique(c(as.character(snv_dat$isolate1), as.character(snv_dat$isolate2))), function(i){
  snv_dat %>% filter(pair_type == 'Intra-facility pair' & (isolate1 == i | isolate2 == i)) %>% 
  group_by(loc1, ST_orig, state1) %>% 
  summarize(dist_min = min(pairwise_dist))
}) %>% bind_rows() %>% suppressMessages() %>% suppressWarnings()
fs8 <- min_dat %>% 
    mutate(under_thresh = ifelse(dist_min <= 15, '(10,15]','(15,∞)'),
         under_thresh = ifelse(dist_min <= 10, '(5,10]',under_thresh),
         under_thresh = ifelse(dist_min <= 5, '[0,5]',under_thresh),
         under_thresh = factor(under_thresh, levels = rev(c('[0,5]','(5,10]','(10,15]','(15,∞)'))),
         ST = ifelse(ST_orig %in% c('K. pneumoniae ST258','E. hormaechei ST171', 'K. pneumoniae ST307','E. hormaechei ST114'), ST_orig, 'Other'),
         ST = factor(ST, levels = c('E. hormaechei ST171', 'E. hormaechei ST114', 'K. pneumoniae ST258', 'K. pneumoniae ST307', 'Other'),
                     labels = c('*E. hormaechei*<br>ST171', '*E. hormaechei*<br>ST114', 
                                '*K. pneumoniae*<br>ST258', '*K. pneumoniae*<br>ST307', 'Other')),
         state1 = factor(state1, levels = c('CT', 'MN', 'TN'))) %>% 
  ggplot(aes(x = reorder(loc1, loc1, function(x)-length(x)), fill = under_thresh)) + 
  geom_bar() +
       scale_fill_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
         labs(y = "Number of isolates", x = "Facility", fill = 'SNV bin of closest\nintra-facility isolate') +
  facet_grid(ST~state1, space = 'free', scales = 'free_x') +
  theme(axis.text.x = element_blank(), strip.text.y = element_markdown()) + 
  scale_y_continuous(breaks = scales::pretty_breaks())

ggsave(plot = fs8, filename = 'figures/FigS8.png', width = 7, height = 5)

fs8

Clinical factor analysis (Table S3)

fisher <- function(a,b,c,d){
  data <- matrix(c(a,b,c,d),ncol=2)
  c(p = fisher.test(data)$p.value,
    OR = fisher.test(data)$estimate)
}
intra_pairs_ct <- snv_dat %>% filter(loc1 == loc2 & st1 == st2 & state1 == 'CT')
intra_pairs_mn <- snv_dat %>% filter(loc1 == loc2 & st1 == st2 & state1 == 'MN')
intra_pairs_tn <- snv_dat %>% filter(loc1 == loc2 & st1 == st2 & state1 == 'TN')

pt_info_ct <- suppressMessages(lapply(1:nrow(intra_pairs_ct), function(x){
  sub <- pt_feats_ct %>% filter(Sample_ID %in% unlist(c(intra_pairs_ct[x,c('isolate1','isolate2')]))) %>% 
  select_if(colSums(is.na(.)) == 0)
  info <- bind_cols(intra_pairs_ct[x,])%>% mutate(shared_clin = FALSE)
  if(ncol(sub) > 2 & nrow(sub) > 1){
    info <- info %>% mutate(shared_clin = TRUE)
  }
  info
})) %>% bind_rows() 

pt_info_mn <- suppressMessages(lapply(1:nrow(intra_pairs_mn), function(x){
  sub <- pt_feats_mn %>% filter(Sample_ID %in% unlist(c(intra_pairs_mn[x,c('isolate1','isolate2')]))) %>% 
  select_if(colSums(is.na(.)) == 0)
  info <- bind_cols(intra_pairs_mn[x,])%>% mutate(shared_clin = FALSE)
  if(ncol(sub) > 2 & nrow(sub) > 1){
    info <- info %>% mutate(shared_clin = TRUE, shared_comorb = paste0(names(sub)[!names(sub) == 'Sample_ID'], collapse = ','))
  }
  info
})) %>% bind_rows() 

pt_info_tn <- suppressMessages(lapply(1:nrow(intra_pairs_tn), function(x){
  sub <- pt_feats_tn %>% filter(Sample_ID %in% unlist(c(intra_pairs_tn[x,c('isolate1','isolate2')]))) %>% 
    unique() %>% 
    select_if(c(TRUE, colSums(.[,2:ncol(.)]) != 0))
  info <- bind_cols(intra_pairs_tn[x,]) %>% mutate(shared_clin = FALSE)
  if(ncol(sub) > 2 & nrow(sub) > 1){
    info <- info %>% mutate(shared_clin = TRUE, shared_comorb = paste0(names(sub)[!names(sub) == 'Sample_ID'], collapse = ','))
  }
  info
})) %>% bind_rows() 

pt_info_comb <- bind_rows(pt_info_mn, pt_info_tn, pt_info_ct) %>%
  mutate(`pairwise_dist <= 10` = pairwise_dist <= 10) %>% 
  select(state1, st1, shared_clin, `pairwise_dist <= 10`)

pt_comorb_compare <- pt_info_comb %>% 
  group_by(state1, st1, shared_clin, `pairwise_dist <= 10`) %>% summarize(count = n()) %>% 
  mutate(sharedclin_pdlt10 = factor(paste0(ifelse(shared_clin, 'shared', 'not shared'), ', ', 
                                           ifelse(`pairwise_dist <= 10`, '≤10', '>10')),
                                    levels = c('shared, ≤10', 'shared, >10', 'not shared, ≤10', 'not shared, >10'))) %>% 
  ungroup() %>% 
  arrange(sharedclin_pdlt10) %>% 
  select(-c(shared_clin, `pairwise_dist <= 10`)) %>% 
  pivot_wider(names_from = sharedclin_pdlt10, values_from = count, values_fill = 0) %>% 
  group_by(state1, st1) %>% 
  rowwise() %>% 
  mutate(fe_p = fisher(`shared, ≤10`, `shared, >10`, `not shared, ≤10`, `not shared, >10`)[[1]]) %>% 
  arrange(state1, st1) %>% 
  mutate(frac_shared_lt10 = `shared, ≤10`/(`shared, ≤10` + `not shared, ≤10`),
         frac_shared_gt10 = `shared, >10`/(`shared, >10` + `not shared, >10`)) 

pt_comorb_compare
## # A tibble: 17 × 9
## # Rowwise:  state1, st1
##    state1 st1            share…¹ share…² not s…³ not s…⁴  fe_p frac_s…⁵ frac_s…⁶
##    <fct>  <chr>            <int>   <int>   <int>   <int> <dbl>    <dbl>    <dbl>
##  1 MN     E. hormaechei…       1       0       0       2 0.333   1        0     
##  2 MN     E. hormaechei…       6      17      80     212 1       0.0698   0.0742
##  3 MN     E. hormaechei…       0       0       1       0 1       0      NaN     
##  4 MN     K. pneumoniae…       0       0       1       0 1       0      NaN     
##  5 MN     K. pneumoniae…       0       0       1       0 1       0      NaN     
##  6 MN     K. pneumoniae…       0       3      25      99 1       0        0.0294
##  7 TN     E. hormaechei…       1       9       0       0 1       1        1     
##  8 TN     E. hormaechei…       0       3       0       0 1     NaN        1     
##  9 TN     E. hormaechei…       1       0       0       0 1       1      NaN     
## 10 TN     E. hormaechei…       0       1       0       0 1     NaN        1     
## 11 TN     E. hormaechei…       1       0       0       0 1       1      NaN     
## 12 TN     E. hormaechei…       0       1       0       0 1     NaN        1     
## 13 TN     K. pneumoniae…       1       0       0       0 1       1      NaN     
## 14 TN     K. pneumoniae…       4       8       1       3 1       0.8      0.727 
## 15 TN     K. pneumoniae…       5       7       3       0 0.200   0.625    1     
## 16 CT     K. pneumoniae…       0       0       0       1 1     NaN        0     
## 17 CT     K. pneumoniae…       1       5       0       5 1       1        0.5   
## # … with abbreviated variable names ¹​`shared, ≤10`, ²​`shared, >10`,
## #   ³​`not shared, ≤10`, ⁴​`not shared, >10`, ⁵​frac_shared_lt10,
## #   ⁶​frac_shared_gt10

Inter-facility analysis

Aggregate patient transfer (Figure S9)

mn_pt_trans <- get_patient_flow(mn_trans_net_sub, 
                                unique(facil_st$Facility_ID)[unique(facil_st$Facility_ID) %in% 
                                                        c(mn_trans_net_sub$source_facil,
                                                          mn_trans_net_sub$dest_facil)]) %>% 
  filter(!duplicated(.))
tn_pt_trans <- get_patient_flow(tn_trans_net_sub, unique(facil_st$Facility_ID)[unique(facil_st$Facility_ID) %in% 
                                                        c(tn_trans_net_sub$source_facil,
                                                          tn_trans_net_sub$dest_facil)]) %>% 
  filter(!duplicated(.))
ct_pt_trans <- get_patient_flow(ct_trans_net_sub, unique(facil_st$Facility_ID)[unique(facil_st$Facility_ID) %in% 
                                                        c(ct_trans_net_sub$source_facil,
                                                          ct_trans_net_sub$dest_facil)]) %>% 
  filter(!duplicated(.))

sts <- c('ST171','ST258','ST114','ST307')

snv_by_st <- suppressWarnings(lapply(sts, function(x){
  snvdat_sub <- snv_dat %>% filter(ST == x) %>% mutate(pairwise_dist = as.numeric(pairwise_dist))
  snvdat_sub <- snvdat_sub[,1:6] 
    mn_snv_by_st_sub <- summarize_pairs(snvdat_sub) %>% mutate(ST = x)
})) %>% bind_rows()

pt_snv <- bind_rows(bind_cols(state = 'MN', mn_pt_trans), 
                    bind_cols(state = 'TN', tn_pt_trans),
                    bind_cols(state = 'CT', ct_pt_trans)) %>% inner_join(snv_by_st) %>%
  mutate(under_thresh = ifelse(dist_min <= 15, '(10,15]','(15,∞)'),
         under_thresh = ifelse(dist_min <= 10, '(5,10]',under_thresh),
         under_thresh = ifelse(dist_min <= 5, '[0,5]',under_thresh),
         under_thresh = factor(under_thresh, levels = c('[0,5]','(5,10]','(10,15]','(15,∞)')),
         species = ifelse(ST %in% c('ST258','ST307'), 'K. pneumoniae','E. hormaechei'))
pvals <- pt_snv %>% 
  mutate(leq_10_bool = ifelse(dist_min <= 10, 'close','not_close')) %>% 
  select(state, ST, leq_10_bool, sum_pt_trans_metric) %>% group_by(state, ST, leq_10_bool) %>% 
  summarize(sum_pt_trans_metric = list(sum_pt_trans_metric)) %>% 
  spread(leq_10_bool, sum_pt_trans_metric) %>% 
  group_by(state, ST) %>% 
  filter(!(is.null(unlist(close)) | is.null(unlist(not_close)))) %>% 
  mutate(close_median=median(unlist(close)),
         not_close_median=median(unlist(not_close)),
         p=wilcox.test(unlist(close),unlist(not_close))$p.value) %>% 
  mutate(species = ifelse(ST %in% c('ST258','ST307'), '*K. pneumoniae*','*E. hormaechei*'))

fs9 <- pt_snv %>% 
  mutate(species = gsub('^', '\\*',species),
         species = gsub('$', '\\*',species),
         sum_pt_trans_metric = sum_pt_trans_metric + min(sum_pt_trans_metric[sum_pt_trans_metric != 0]/2), # no infinite values in transformation
         dist_min = dist_min + min(dist_min[dist_min != 0]/2)) %>%  # no infinite values in transformation
  ggplot(aes(x = sum_pt_trans_metric, y = dist_min)) + 
  geom_point(aes(color = under_thresh)) + scale_y_log10() + scale_x_log10() +
  scale_color_manual(
    values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")
    ) +
  geom_text(data = pvals, aes(x = 5e-5, y = 1, label = paste0('p=',signif(p, 1))), hjust = 0) +
  facet_grid(species+ST~state) + 
  labs(x = 'Sum(patient flow between facility pair)', y = 'Minimum pairwise SNV distance', color = 'SNV bin') +
  theme(strip.text.y = element_markdown())

ggsave(plot = fs9, filename = 'figures/FigS9.png', width = 7, height = 4)

fs9

Prior hospital

Individual healthcare exposures (Table 2)

pr_summary <- bind_rows(pr_hosp_pairs_ct, pr_hosp_pairs_mn, pr_hosp_pairs_tn) %>%
  left_join(snv_dat %>% filter(loc1 != loc2)) %>% 
  mutate(st1 = ifelse(st1 %in% c("E. hormaechei ST114", "E. hormaechei ST171", "K. pneumoniae ST258", 
"K. pneumoniae ST307"), st1, 'Other')) %>% 
  group_by(state1, st1, same_pr_hosp, pairwise_dist <= 10) %>% 
  summarize(total_l10 = n()) %>% 
  mutate(same_pr_hosp = ifelse(same_pr_hosp, 'pr_hosp','no_pr_hosp'),
         `pairwise_dist <= 10` = ifelse(`pairwise_dist <= 10`, 'lt10','gt10')) %>% 
  pivot_wider(names_from = c(same_pr_hosp, `pairwise_dist <= 10`), values_from = c(total_l10)) %>% 
  rename(state = state1, st = st1) %>% 
  select(state, st, pr_hosp_lt10, no_pr_hosp_lt10, pr_hosp_gt10, no_pr_hosp_gt10) %>% 
  replace(is.na(.), 0)

pr_summary$fisher_p <- sapply(1:nrow(pr_summary), function(x){
  exact2x2(matrix(unlist(c(pr_summary[x,3:6])), nrow = 2))$p.value %>% signif(digits = 1)
})

pr_summary <- pr_summary %>% 
  mutate(pr_hosp_lt10_pct = round(pr_hosp_lt10/(no_pr_hosp_lt10 + pr_hosp_lt10), 1)*100, .after = no_pr_hosp_lt10) %>% 
  mutate(pr_hosp_gt10_pct = round(pr_hosp_gt10/(no_pr_hosp_gt10 + pr_hosp_gt10), 1)*100, .after = no_pr_hosp_gt10) %>% 
  mutate('Pairwise SNV distance ≤ 10\nwith shared facility exposure (%)' =
           paste0(pr_hosp_lt10, '/', (pr_hosp_lt10+no_pr_hosp_lt10), ' (',round(pr_hosp_lt10/(pr_hosp_lt10+no_pr_hosp_lt10),2)*100,')'),
         'Pairwise SNV distance > 10\nwith shared facility exposure (%)' =
           paste0(pr_hosp_gt10, '/', (pr_hosp_gt10+no_pr_hosp_gt10), ' (',round(pr_hosp_gt10/(pr_hosp_gt10+no_pr_hosp_gt10),2)*100,')')) %>% 
  select(c("state", "st", 
           "Pairwise SNV distance ≤ 10\nwith shared facility exposure (%)", 
           "Pairwise SNV distance > 10\nwith shared facility exposure (%)",
           "fisher_p"
)) %>% 
  rename(State = state, ST = st, "Fisher's exact p" = fisher_p)

pr_summary
## # A tibble: 13 × 5
## # Groups:   State, ST [13]
##    State ST                  Pairwise SNV distance ≤ 10\nwith …¹ Pairw…² Fishe…³
##    <fct> <chr>               <chr>                               <chr>     <dbl>
##  1 MN    E. hormaechei ST114 3/9 (33)                            1/43 (…    1e-2
##  2 MN    E. hormaechei ST171 23/430 (5)                          62/195…    3e-2
##  3 MN    K. pneumoniae ST258 10/62 (16)                          23/195…    2e-8
##  4 MN    K. pneumoniae ST307 3/3 (100)                           0/7 (0)    8e-3
##  5 MN    Other               2/4 (50)                            0/6 (0)    1e-1
##  6 TN    E. hormaechei ST114 4/16 (25)                           24/79 …    8e-1
##  7 TN    E. hormaechei ST171 1/4 (25)                            1/21 (…    3e-1
##  8 TN    K. pneumoniae ST258 7/21 (33)                           7/369 …    1e-6
##  9 TN    K. pneumoniae ST307 5/17 (29)                           30/104…    1e+0
## 10 TN    Other               7/13 (54)                           4/18 (…    1e-1
## 11 CT    K. pneumoniae ST258 1/2 (50)                            1/197 …    2e-2
## 12 CT    K. pneumoniae ST307 0/1 (0)                             0/2 (0)    1e+0
## 13 CT    Other               0/3 (0)                             0/4 (0)    1e+0
## # … with abbreviated variable names
## #   ¹​`Pairwise SNV distance ≤ 10\nwith shared facility exposure (%)`,
## #   ²​`Pairwise SNV distance > 10\nwith shared facility exposure (%)`,
## #   ³​`Fisher's exact p`

Individual graph (Figure S10)

fs10 <- graph %>% ggraph(layout = 'kk') + 
  geom_edge_parallel(aes(color = st1, linetype = same_pr_hosp, alpha = same_pr_hosp),sep = unit(0.4, "mm")) + 
  geom_node_point(size = 0.5) + 
  scale_edge_linetype_manual(values = c('dotted','solid')) +
  facet_nodes(~state, nrow = 3) +
  scale_edge_alpha_manual(values = c(0.5, 1)) + 
  scale_edge_color_manual(values = c('*E. hormaechei* ST171'="aquamarine4", '*E. hormaechei* ST114'="aquamarine3",
                                     '*K. pneumoniae* ST258'="coral4", '*K. pneumoniae* ST307'="coral3",
                                     'Other'='grey'), 
                          breaks = c('*E. hormaechei* ST171', '*E. hormaechei* ST114',
                                      '*K. pneumoniae* ST258', '*K. pneumoniae* ST307', 'Other'), drop = FALSE) +
  labs(edge_color = 'Isolate pair ≤ 10 SNVs', edge_linetype = 'Patients have\nshared facility exposure',
       edge_alpha = 'Patients have\nshared facility exposure') +
  theme_graph() + theme(legend.text = element_markdown())

ggsave(plot = fs10, filename = 'figures/FigS10.png', width = 5, height = 6)

fs10

Comparison of individual to aggregate (Figure S11)

fs11 <- full_join(pr_summary %>% mutate(ST_orig = ST, ST = gsub('.* ','',ST)) %>% rename('ind_p' = "Fisher's exact p"), 
          pvals %>% rename(State = state, 'agg_p' = 'p')) %>% 
  mutate(ST_orig = gsub('^', '\\*', ST_orig),
         ST_orig = gsub('e ', 'e\\* ', ST_orig),
         ST_orig = gsub('i ', 'i\\* ', ST_orig)) %>% 
  filter(ST != 'Other') %>% 
  ggplot(aes(x = ind_p, y = agg_p, col = State, shape = ST_orig)) +
  geom_point(size = 3) +
  scale_color_manual(values = c(CT = "#46f0f0", MN = "#3cb44b", TN = "#4363d8")) +
  geom_hline(yintercept = 0.05) +
  geom_vline(xintercept = 0.05) +
  labs(x = 'Patient transfer p-value from\nindividual-level analysis', 
       y = 'Patient transfer p-value from\naggregate-level analysis',
       shape = 'ST') +
    theme(legend.text = element_markdown())

ggsave(plot = fs11, filename = 'figures/FigS11.png', width = 5, height = 3)

fs11

MN maps (Figure 3)

# connecting each patient to their closest isolate with a subsequent collection date
edge_dat <- lapply(unique(c(as.character(snv_dat$isolate1), as.character(snv_dat$isolate2))), function(i){
  cultdate <- facil_st$Cult_Date[facil_st$Sample_ID == i]
  subset_dat <- snv_dat %>% filter(pair_type == 'Inter-facility pair' & (isolate1 == i | isolate2 == i) & state1 == 'MN')
  dates <- sapply(unique(c(as.character(subset_dat$isolate1), as.character(subset_dat$isolate2))), function(j){
    as.character(facil_st$Cult_Date[facil_st$Sample_ID == j])
  }) 
  dat <- NULL
  if(length(dates) > 0){
    keepers <- names(dates)[as.Date(dates) > cultdate]
    dat <- subset_dat %>% 
      filter(isolate1 %in% keepers | isolate2 %in% keepers) %>% 
      filter(pairwise_dist == min(pairwise_dist)) %>% 
      mutate(earlier = as.character(ifelse(isolate2 == i, 'loc2','loc1')),
             date1 = dates[as.character(isolate1)],
             date2 = dates[as.character(isolate2)],
             datediff = abs(as.numeric(as.Date(dates[as.character(isolate1)]) - as.Date(dates[as.character(isolate2)]))))
  }
}) %>% bind_rows() 

edges_for_plot <- edge_dat %>% 
  filter(pairwise_dist <= 15) %>% 
  left_join(facil_st %>% select(Sample_ID, Facility_ID, longitude, latitude), 
            by = c('isolate1' = 'Sample_ID', 'loc1' = 'Facility_ID')) %>%
  rename(x = longitude, y = latitude) %>%
  left_join(facil_st %>% select(Sample_ID, Facility_ID, longitude, latitude), 
            by = c('isolate2' = 'Sample_ID', 'loc2' = 'Facility_ID')) %>%
  rename(xend = longitude, yend = latitude) %>% 
  filter(!(is.na(x) | is.na(xend)) & ST %in% c('ST171','ST258')) %>% 
  mutate(dist = abs(x - xend)^2 + abs(y - yend)^2)

facilities <- facil_st %>% filter(State == 'MN' & ST %in% c('E. hormaechei ST171', 'K. pneumoniae ST258')) %>% 
  select(Facility_ID, latitude, longitude) %>% 
  filter(!duplicated(.) & !is.na(latitude)) 
edges_for_plot_ec <- edges_for_plot %>% filter(ST == 'ST171') %>% 
  mutate(under_thresh = factor(under_thresh, levels = c("[0,5]", "(5,10]", "(10,15]")))

whole_state_ec <- facil_st %>% 
  mutate(ST = gsub('cloacae', 'hormaechei', ST)) %>% 
  filter(ST %in% c('E. hormaechei ST171')) %>%
  mutate(ST = gsub('.* ST', 'ST', ST)) %>% 
  group_by(Facility_ID, latitude, longitude, ST)  %>% tally() %>%
  ggplot(aes(x=longitude, y=latitude), color = "grey99") +
    geom_curve(aes(x = xend, y = yend, xend = x, yend = y, color = under_thresh),     # draw edges as arcs
                 alpha = 1,
             data = edges_for_plot_ec, curvature = 0.33) + 
  scale_color_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4")) +
  geom_point(data = facilities, aes(x = longitude, y = latitude), alpha = 0.25, size = 1) +
  geom_point(aes(size=n), alpha = 1, color = 'white') +
  geom_point(aes(size=n), fill = adjustcolor(st_cols[c('E. hormaechei ST171')], alpha.f = 0.5), pch = 21) + 
  scale_size_continuous(breaks = seq(0, 20, 5), limits = c(0,25),
                        labels = c(0,5,10,15,20),
                        name = 'Number of\nisolates') +
  coord_quickmap() +
  # geom_rect(aes(xmin = -986875.33, xmax = -986875.165, ymin = 896963.81, ymax = 896964.1),
  geom_rect(aes(xmin = -986875.33+0.58, xmax = -986875.165+0.58, ymin = 896963.81+0.145, ymax = 896964.1+0.145),
  fill = "transparent", color = "lightgrey", size = 1) +
  labs(color = 'SNV bin', size = 'Number of\nisolates') +
  guides(size = guide_legend(override.aes = list(alpha = c(0.25, 1, 1, 1, 1), fill = 'black'))) +
  theme_void() 
whole_state_ec

zoom_state_ec <- whole_state_ec + 
  # coord_quickmap(xlim=c(-986875.33+0.007, -986875.165-0.007), ylim=c(896963.81+0.012, 896964.1-0.012)) +
  coord_quickmap(xlim=c(-986875.33+0.58+0.007, -986875.165+0.58-0.007), ylim=c(896963.81+0.145+0.013, 896964.1+0.145-0.013)) +
  theme(legend.position = "none") + 
  geom_text(aes(label = ifelse(Facility_ID == 'F38', 'W', NA)),hjust=1.5, vjust=2)
zoom_state_ec

edges_for_plot_kp <- edges_for_plot %>% filter(ST == 'ST258')
whole_state_kp <- facil_st %>% 
  filter(ST %in% c('K. pneumoniae ST258')) %>%
  mutate(ST = gsub('.* ST', 'ST', ST)) %>% 
  group_by(Facility_ID, latitude, longitude, ST)  %>% tally() %>%
  ggplot(aes(x=longitude, y=latitude), color = "grey99") +
    geom_curve(aes(x = xend, y = yend, xend = x, yend = y, color = under_thresh),     # draw edges as arcs
                 alpha = 1,
             data = edges_for_plot_kp, curvature = 0.33) + 
  scale_color_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
  geom_point(data = facilities, aes(x = longitude, y = latitude), size = 1, alpha = 0.25) +
  geom_point(aes(size=n), alpha = 1, color = 'white') +
  geom_point(aes(size=n), fill = adjustcolor(st_cols['K. pneumoniae ST258'], alpha.f = 0.5), pch = 21) + 
  scale_size_continuous(breaks = seq(5, 20, 5), limits = c(1,25)) +
  coord_quickmap() +
  # geom_rect(aes(xmin = -986875.33, xmax = -986875.165, ymin = 896963.81, ymax = 896964.1),
  geom_rect(aes(xmin = -986875.33+0.58, xmax = -986875.165+0.58, ymin = 896963.81+0.145, ymax = 896964.1+0.145),
               fill = "transparent", color = "lightgrey", size = 1) +
  labs(size = 'Number of\nisolates', color = 'SNV bin') +
  theme_void() 
whole_state_kp

zoom_state_kp <- whole_state_kp + 
  # coord_quickmap(xlim=c(-986875.33+0.007, -986875.165-0.007), ylim=c(896963.81+0.012, 896964.1-0.012)) +
  coord_quickmap(xlim=c(-986875.33+0.58+0.007, -986875.165+0.58-0.007), ylim=c(896963.81+0.145+0.013, 896964.1+0.145-0.013)) +
  theme(legend.position = "none") +
   geom_text(aes(label = ifelse(Facility_ID == 'F38', 'W', NA)),hjust=1.5, vjust=2)
zoom_state_kp

# grob plotting function to plot panel colors
plot_panel_cols <- function(p, fills){
  gt <- ggplot_gtable(ggplot_build(p))
  strip_both <- which(grepl('strip-', gt$layout$name))
  k <- 1
  for (i in strip_both) {
    j <- which(grepl('rect', gt$grobs[[i]]$grobs[[1]]$childrenOrder))
    gt$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
    k <- k+1
  }
  ggplotify::as.ggplot(gt)
}
bp <- edge_dat %>% pivot_longer(cols = c('loc1','loc2'), names_to = 'fname', values_to = 'loc') %>% 
  filter(ST %in% c('ST171','ST258') & under_thresh != '(15,∞)') %>%
  mutate(under_thresh = factor(under_thresh, levels = rev(levels(under_thresh))),
         ST = factor(ifelse(ST == 'ST171', '*E. hormaechei* ST171', '*K. pneumoniae* ST258'),
                     levels = c('*E. hormaechei* ST171', '*K. pneumoniae* ST258'))) %>% 
  filter(fname == earlier) %>% 
  select(loc, ST, under_thresh) %>%
  ggplot(aes(x = loc)) + 
    geom_bar(aes(col = ifelse(loc == 'F38', 'yes','no'))) +
  geom_bar(aes(fill = under_thresh)) + 
  facet_grid(ST~.) +
    scale_fill_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
  scale_color_manual(values = c('yes' = 'black','no' = NA)) +
  labs(x = 'Facility', y = 'Number of isolates closely related\nto an isolate from another facility', fill = 'SNV bin')

bp_labeled <- bp + scale_x_discrete(labels = ifelse(ggplot_build(bp)$layout$panel_params[[1]]$x$breaks == 'F38', 'W','')) +
  theme_classic() +
  theme(legend.position = 'none', #axis.ticks.x = element_blank(), 
        text = element_text(size = 10), 
        axis.text.x = element_text(angle = 0),
        strip.background = element_rect(fill="white",linetype='blank'),
        strip.text.y = element_markdown())
bp_col <- plot_panel_cols(bp_labeled, adjustcolor(st_cols[c('E. hormaechei ST171','K. pneumoniae ST258')], alpha.f = 0.5)) 
leg <- ggpubr::get_legend(whole_state_ec +
                            theme(legend.box.margin = margin(0, 0, 0, 50))
)
f3 <- plot_grid(leg, NULL,
          plot_grid(whole_state_ec + theme(legend.position = 'none'), zoom_state_ec,
                    NULL, NULL,
                    whole_state_kp + theme(legend.position = 'none'), zoom_state_kp,
                    NULL, NULL,
                    nrow = 4, 
                    rel_heights = c(1,0.05,1,0.1,1,0.05,1,0.1)),
          plot_grid(NULL, bp_col, NULL, ncol = 1, rel_heights = c(0.1,0.8,0.05)), nrow = 1,
          rel_widths = c(0.2, 0.05, 1, 0.8) 
)

ggsave(plot = f3, filename = 'figures/Fig3.png', width = 10, height = 4)

f3

# prior facilities for facility w/ st171 isolates 
facil_w %>% 
  mutate(n_patients_tot = n_distinct(Sample_ID)) %>% 
  filter(!is.na(prior_hosp_alias)) %>% 
  unique() %>% 
  group_by(prior_hosp_alias, n_patients_tot) %>% 
  summarize(n_patients = n()) %>% 
  ungroup() %>% 
  mutate(prop_patients = n_patients/n_patients_tot) %>% 
  arrange(-n_patients) 
## # A tibble: 11 × 4
##    prior_hosp_alias n_patients_tot n_patients prop_patients
##               <dbl>          <int>      <int>         <dbl>
##  1                4             24         10        0.417 
##  2                2             24          2        0.0833
##  3                1             24          1        0.0417
##  4                3             24          1        0.0417
##  5                5             24          1        0.0417
##  6                6             24          1        0.0417
##  7                7             24          1        0.0417
##  8                8             24          1        0.0417
##  9                9             24          1        0.0417
## 10               10             24          1        0.0417
## 11               11             24          1        0.0417

Comparison of ST171 and ST258 inter-facility transmission patterns (Figure S12)

## for each facility, how many times it's involved in a link

frac_ec <- lapply(unique(edges_for_plot_ec$loc1, edges_for_plot_ec$loc2), function(x){
  edges_for_plot_ec %>% filter(pairwise_dist <= 10) %>% 
  mutate(conn = ifelse(loc1 == !!x | loc2 == !!x, 'connected', 'not')) %>% 
  group_by(conn, ST) %>% summarize(count = n()) %>% mutate(facil = x)
}) %>% bind_rows() %>% suppressMessages() %>% ungroup() %>% filter(!duplicated(.)) %>% 
  group_by(facil) %>% 
  mutate(n_facil = sum(count), frac = count/n_facil) %>% 
  ungroup() %>% 
  filter(conn == 'connected') 

frac_kp <- lapply(unique(edges_for_plot_kp$loc1, edges_for_plot_kp$loc2), function(x){
  edges_for_plot_kp %>% filter(pairwise_dist <= 10) %>% 
  mutate(conn = ifelse(loc1 == !!x | loc2 == !!x, 'connected', 'not')) %>% 
  group_by(conn, ST) %>% summarize(count = n()) %>% mutate(facil = x)
}) %>% bind_rows() %>% suppressMessages() %>% ungroup() %>% filter(!duplicated(.)) %>% 
  group_by(facil) %>% 
  mutate(n_facil = sum(count), frac = count/n_facil) %>% 
  ungroup() %>% 
  filter(conn == 'connected') 

max(frac_ec$frac)
## [1] 0.74
facils_ec <- frac_ec %>% pull(facil)
ec_perm_cts <- lapply(1:1000, function(x){
  sample(facils_ec, sum(frac_ec$count), replace = TRUE) %>% table() %>% data.frame() %>% rename(facil = '.') %>% 
    mutate(tot = sum(Freq), perm = x, frac = Freq/tot) %>% slice_max(Freq) %>% slice(1)
}) %>% bind_rows() 

max(frac_kp$frac)
## [1] 0.2727273
facils_kp <- frac_kp %>% pull(facil)
kp_perm_cts <- lapply(1:1000, function(x){
  sample(facils_kp, sum(frac_kp$count), replace = TRUE) %>% table() %>% data.frame() %>% rename(facil = '.') %>% 
    mutate(tot = sum(Freq), perm = x, frac = Freq/tot) %>% slice_max(Freq) %>% slice(1)
}) %>% bind_rows() 

fs12 <- bind_rows(ec_perm_cts %>% select(frac) %>% mutate(ST = 'ST171'),
          kp_perm_cts %>% select(frac) %>% mutate(ST = 'ST258')) %>% 
  mutate(ST = factor(ifelse(ST == 'ST171', '*E. hormaechei* ST171', '*K. pneumoniae* ST258'),
                     levels = c('*E. hormaechei* ST171', '*K. pneumoniae* ST258'))) %>% 
  ggplot(aes(x = frac)) +
  geom_histogram(binwidth = 0.05) +
  geom_point(data = tibble(frac = c(max(frac_ec$frac), max(frac_kp$frac)), 
                           st = c('*E. hormaechei* ST171', '*K. pneumoniae* ST258')), 
             aes(y = 0), col = 'red', shape = 18, size = 5) +
  facet_grid(~st, scales = 'free') +
  labs(x = 'Maximum proportion of edges connected to a given facility', y = 'Number of edges') +
  theme(strip.text.x = element_markdown())

sum(ec_perm_cts$frac > max(frac_ec$frac))/length(ec_perm_cts$frac)
## [1] 0
sum(kp_perm_cts$frac > max(frac_kp$frac))/length(kp_perm_cts$frac)
## [1] 0.301
fs12 <- plot_panel_cols(fs12, adjustcolor(st_cols[c('E. hormaechei ST171','K. pneumoniae ST258')], alpha.f = 0.5)) 

# ggsave(plot = fs12, filename = 'figures/FigS12.png', width = 7, height = 4)

fs12

Facility W (Figure S13)

g_mn <- mn_trans_net_sub %>% 
  select(source_facil, dest_facil) %>%
  as.matrix() %>% 
  graph_from_edgelist(directed = TRUE)

E(g_mn)$weight = mn_trans_net_sub %>% select(n_transfers) %>% deframe()

deg <- igraph::degree(g_mn)
stre <- strength(g_mn)
betw <- betweenness(g_mn)
clos <- closeness(g_mn, mode = 'all')

facil_w_ind <- which(names(V(g_mn)) == 'F38')

fs13 <- bind_cols(inds=names(deg), 'Degree'=deg, 'Strength'=stre, 'Betweenness'=betw, 'Closeness'=clos) %>% 
  pivot_longer(cols = -inds) %>% 
  ggplot(aes(x = value)) + geom_histogram(bins = 70) +
  geom_point(data = data.frame(
    value=c(deg[facil_w_ind], stre[facil_w_ind], betw[facil_w_ind],clos[facil_w_ind]),
    name=c('Degree','Strength','Betweenness','Closeness')),
    aes(x = value, y = 0), col = 'red', shape = 18, size = 3) +
  ggrepel::geom_text_repel(data = data.frame(
    value=c(deg[facil_w_ind], stre[facil_w_ind], betw[facil_w_ind],clos[facil_w_ind]), 
    name=c('Degree','Strength','Betweenness','Closeness')), 
    aes(label = 'W', y = 0), col = 'red', size = 3) +
  labs(x = 'Centrality metric', y = 'Number of facilities') +
  facet_wrap(~name, scales = 'free') +
  theme(plot.margin = margin(0,0.5,0,0, "cm"))

ggsave(plot = fs13, filename = 'figures/FigS13.png', width = 5, height = 3.5)

fs13

TN facility cluster (Figure 4)

prior_hosps_tn_counts <- prior_hosps_tn %>% 
  select(Sample_ID, Fac_ID) %>% 
  unique() %>% 
  rename(Facility = Fac_ID) %>% 
  bind_rows(prior_hosps_tn %>% 
              select(Sample_ID, Prior_facil) %>% 
              rename(Facility = Prior_facil)) %>% 
  left_join(facil_st %>% 
              mutate(ST = ifelse(ST %in% c('E. hormaechei ST171','E. hormaechei ST114',
                                 'K. pneumoniae ST258','K. pneumoniae ST307', 
                                 'E. cloacae ST171', 'E. cloacae ST114', 
                                 'ST258', 'ST307', 'ST114', 'ST171'),
                       ST, 'Other'))) %>% 
  filter(!is.na(ST)) %>% 
  group_by(ST, Facility) %>% 
  summarize(count = n()) %>% 
  arrange(-count) 

tn_hm <- prior_hosps_tn_counts %>% 
  pivot_wider(names_from = Facility, values_from = count, values_fill = 0) %>% 
  data.frame(row.names = .$ST) %>% 
  select(-ST) %>% 
  pheatmap(show_colnames = FALSE, fontsize_col = 8)

tn_facil_hm <- prior_hosps_tn_counts %>% 
  mutate(Facility = factor(Facility, levels = rev(tn_hm$tree_col$labels[tn_hm$tree_col$order]) %>% gsub('TDH.', 'TDH-', .) %>% gsub('LONG.', 'LONG-', .) %>% gsub('VA.', 'VA-', .) %>% gsub('\\.', ' ', .)),
         ST = factor(ST, levels = tn_hm$tree_row$labels),
         ST = gsub('E. hormaechei', '*E. hormaechei*', ST),
         ST = gsub('K. pneumoniae', '*K. pneumoniae*', ST)) %>% 
  ggplot(aes(x = Facility, y = ST, fill = count)) +
  geom_tile() +
  geom_rect(aes(xmin = 0+0.5, xmax = 7+0.5, ymin = 0+0.5, ymax = 3+0.5), fill = NA, col = 'black', size = 1, linetype = 'dotted') +
  scale_fill_viridis_c(direction = -1, breaks = scales::pretty_breaks(), option = 'magma') +
  theme_classic() +
  theme(axis.text.x = element_blank(), axis.text.y = element_markdown()) +
  labs(fill = 'Count')

tn_facil_hm

tn_facils_st <- facil_st %>% filter(State == 'TN') %>% 
  mutate(ST = ifelse(ST %in% c('E. hormaechei ST171','E. hormaechei ST114',
                                 'K. pneumoniae ST258','K. pneumoniae ST307', 
                                 'E. cloacae ST171', 'E. cloacae ST114', 
                                 'ST258', 'ST307', 'ST114', 'ST171'),
                       ST, 'Other')) %>% 
  filter(!is.na(Facility_ID)) %>% 
  group_by(Facility_ID, ST) %>% 
  summarize(count = n()) %>% 
  arrange(Facility_ID) %>% 
  group_by(Facility_ID) %>% 
  mutate(max_count = count[which.max(count)],
         n_isolates = sum(count)) %>% 
  group_by(Facility_ID, n_isolates) %>% 
  summarize(max_sts = str_c(ST[count == max_count], collapse = ', ')) %>% 
  left_join(tn_facil_cluster) %>% 
  mutate(facil_st_type = case_when(max_sts == 'K. pneumoniae ST258' ~ '*K. pneumoniae* ST258',
                                   grepl('K. pneumoniae ST258', max_sts) & max_sts != 'K. pneumoniae ST258' ~ '*K. pneumoniae* ST258 & Other',
                                   of_interest ~ 'ST307/ST114/Other facility cluster'),
         facil_st_type = ifelse(is.na(facil_st_type), 'Other', facil_st_type)) 
graph_tn <- tbl_graph(nodes = tn_facils_st, edges = tn_trans_net_sub %>% filter(!grepl('I', source_facil) & !grepl('I', dest_facil) & n_transfers > 1), directed = TRUE) %>%
  induced.subgraph(igraph::degree(.) > 0)

tn_network <- graph_tn %>% ggraph(layout = 'kk') + 
  geom_edge_link(aes(color = n_transfers, alpha = n_transfers), arrow = arrow(length = unit(3, 'mm'))) + 
  geom_node_point(aes(color = facil_st_type, size = n_isolates)) + 
  scale_edge_color_viridis(direction = -1, option = 'E') +
  scale_color_manual(values = c(unname(st_cols[1]), 'coral3', 'lightsalmon', 'grey')) +
  guides(edge_alpha = 'none') +
  theme_graph() +
  labs(color = 'Most common ST', shape = '', size = 'Number of isolates',
       edge_color = 'Number of transfers (in thousands)')  #+
  # theme(legend.text = element_markdown())
tn_network

tn_facil_transfers <- tn_trans_net_sub %>% 
  left_join(tn_facils_st %>% rename(source_of_interest = of_interest) %>% 
              select(Facility_ID, source_of_interest), 
            by = c(source_facil = 'Facility_ID')) %>% 
  left_join(tn_facils_st %>% rename(dest_of_interest = of_interest) %>% 
              select(Facility_ID, dest_of_interest), 
            by = c(dest_facil = 'Facility_ID')) %>% 
  replace(is.na(.), 0) %>% 
  mutate(of_interest = source_of_interest + dest_of_interest) %>% 
  filter(!grepl('I', source_facil) & !grepl('I', dest_facil) & n_transfers > 1) %>%
  ggplot(aes(x = n_transfers, linetype = factor(of_interest, levels = c(2, 1, 0)))) +
  geom_density() +
  labs(x = 'Number of patient transfers', y = 'Density', linetype = 'Number of isolates\nof pair in facility\ncluster') +
  theme(legend.position = 'left')

tn_facil_transfers

plot_grid(plot_grid(tn_facil_hm + theme(legend.position = 'left'), tn_facil_transfers, nrow = 2, align = 'hv', labels = c('A', 'C')), tn_network, nrow = 1, rel_widths = c(0.8, 1), labels = c('', 'B')) %>% 
  ggsave(filename = 'figures/Fig4_test.png', width = 12, height = 5)